Case Study Data
- IBIMS region as study area
- Strava Metro Counts as Exposure
- BikeMaps near misses and collisions as incidents
- ICBC left out due to snapping of incidents to nearest intersection or midblock, gives a potentially very misleading view of the spatial distribution of crashes, which will become more problematic with network kernel density techniques
library(tidyr)
library(raster)
library(spatstat)
library(maptools)
library(sf)
library(dplyr)
library(ggplot2)
library(mapview)
#load functions
file.sources = list.files(path = paste0(getwd(),"/R/functions/"),pattern="*.R$",full.names = TRUE)
sapply(file.sources,source,.GlobalEnv)
study_area <- read_sf(paste0(getwd(),"/input/processed/study_area.gpkg")) %>% st_union()
network <- read_sf(paste0(getwd(),"/input/processed/edge_ec_201601_201709_total.gpkg"))
incidents <- read_sf(paste0(getwd(),"/input/processed/inc_bm_201601_201709_snp_30m.gpkg")) %>%
filter(st_intersects(x = ., y = st_as_sfc(st_bbox(study_area)), sparse = FALSE))
ggplot() +
geom_sf(data = network,color="lightgrey",alpha=0.05)+
geom_sf(data = incidents,aes(color=p_type),show.legend = "point")+
scale_color_brewer(name = "Incident Type",
type = "qual")+
theme_minimal() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.minor = element_blank() ,
panel.grid.major = element_blank() ,
axis.title = element_blank(),
legend.position = "left") + ggtitle("Bicycling Incidents in the Victoria Region: \nJanuary 2016 - September 2017")

mapview(network,z="CAADB") + mapview(incidents, z= "p_type")
Planar Kernel Density Surfaces
Kernel Density estimations transform point data to a continuous surface:
\[\lambda(z)= \sum_{i=1}^{n} \frac{1}{\pi \tau^2} k(\frac{d_{iz}}{\tau})y_i\] Where,
\(\lambda\)(z) = density at location z;
\(\tau\) is bandwidth distance;
\(k\) is the kernel function, typically a function of the ratio of \(d_{iz}\) to \(\tau\);
\(d_{iz}\) is the distance from event \(i\) to location \(z\).
Using the Quartic function:
\[\lambda(z)= \sum_{i=1}^{n} \frac{1}{\pi \tau^2}(\frac{3}{\pi}(1-\frac{d_{iz}^2}{\tau^2}))y_i\]
- We use the density.ppp function from the spatstat package
- We test 4 bandwidth (\(\tau\)) values: 50m,100m,250m and 500m
- We compare hotspots derived by taking the top 10%, 5%, 1% and 0.1% of raster cells
- We keep the cell size constant at 10m x 10m
Hotspots Interactive Map
- Hotspots displayed as a contour map
- Each bandwidth layer can be toggled on and off by clicking the layers icon, below the zoom buttons, on the left hand side
# study_area_sp <- as(st_as_sfc(st_bbox(study_area)),"Spatial")
study_area_sp <- as(study_area,"Spatial")
inc_sp <- as(incidents,"Spatial")
inc_ppp <- as(inc_sp,"ppp") #create ppp object from bikemaps points
Window(inc_ppp) <- as(study_area_sp,"owin") #define study extent in ppp object
bandwidth <- c(50,100,150,250)
inc_planar <- lapply(1:length(bandwidth),function(x){
density(inc_ppp,
kernel = "quartic",
sigma = bandwidth[x],
eps = 10,
positive=TRUE)
}) #kernel density images
inc_planar_r <- lapply(inc_planar,function(x) raster(x,crs = CRS('+init=EPSG:26910'))) #convert to raster file
names(inc_planar_r) <- bandwidth#convert to raster file
incident_hotspots <- lapply(inc_planar_r, function(x) detect_planar_hotspot(x,percentiles = c(0.90,0.95,0.99,0.999)))
#interactive plot
mapview(incident_hotspots,alpha.regions=0.25)
Hotspots Map by Threshold and Bandwidth
incident_hotspots_cmbnd <- do.call(what = sf:::rbind.sf, args = incident_hotspots) %>%
mutate(bw = row.names(.)) %>%
separate(col = bw,into = c("bw",NA)) %>%
mutate(bw=factor(bw,levels = as.character(bandwidth)))
pal <- RColorBrewer::brewer.pal(6,name = "Reds")[-(1:2)]
ggplot()+
geom_sf(data=st_union(study_area),fill="snow1") +
geom_sf(data=incident_hotspots_cmbnd,aes(fill=layer,colour=layer)) +
scale_colour_manual(values = pal) +
scale_fill_manual(values = pal) +
facet_grid(bw~layer) +
theme_minimal() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.minor = element_blank() ,
panel.grid.major = element_blank() ,
axis.title = element_blank(),legend.position = "none") +
coord_sf() +
ggtitle("Incident Kernel Density: By Threshold (Columns) & Bandwidth (rows)")
Quantifying differences
hs <- lapply(1:nrow(incident_hotspots_cmbnd),
function(x) st_sf(st_cast(st_union(incident_hotspots_cmbnd[x,]), "POLYGON")) %>%
mutate(area= st_area(.)))
qe <- tibble(threshold = incident_hotspots_cmbnd$layer,
bandwidth = incident_hotspots_cmbnd$bw,
m2 = sapply(hs,function(x){sum(x$area)}),
avg_m2 = sapply(hs,function(x){mean(x$area)}),
n = sapply(hs,FUN = nrow))
qe %>%
select(threshold,bandwidth,n) %>%
pivot_wider(names_from = bandwidth,values_from = n) %>%
knitr::kable()
| 0.9 |
2302 |
69 |
34 |
10 |
| 0.95 |
110 |
91 |
28 |
9 |
| 0.99 |
137 |
17 |
10 |
6 |
| 0.999 |
13 |
4 |
2 |
1 |
- Number of hotspots by threshold and bandwidth
- Larger bandwidth results in less distinct hotspots (e.g instead of many distinct hotspots, few larger hotspots)
- Higher threshold results in fewer hotspots (generally) but this difference is much more pronounced for smaller bandwidths than for larger ones
- e.g. 6 distinct hotspots for a 500m bandwidth with 90% threshold, compared to 1 with 99.9% threshold. Compare this with 2302 distinct hotspots for a 50m bandwidth with a 90% threshold and 13 for a 99.9% threshold
pal <- RColorBrewer::brewer.pal(6,name = "Purples")[-(1:2)]
qe %>%
ggplot(aes(x=bandwidth,y=avg_m2,colour=threshold,group=threshold)) +
geom_point() +
scale_colour_manual(values = pal) +
geom_line() + theme_minimal()

- Increasing the bandwidth results in larger and fewer hotspots
- Decreasing the bandwidth you an increased number of hotspots of a smaller size
Network Kernel Density Surfaces
To produce density estimates on network:
\[\lambda(z)= \sum_{i=1}^{n} \frac{1}{\tau} k(\frac{d_{iz}}{\tau})y_i\]
Using the Quartic function:
\[\lambda(z)= \sum_{i=1}^{n} \frac{1}{\tau}(\frac{3}{\pi}(1-\frac{d_{iz}^2}{\tau^2}))y_i\]
- I created function in R to implement network based KDE from algorithm outlined in Xie & Yan (2008)
- The network KDE is a a 1-D version of the planar kernel density estimator, with \(\tau\) (bandwidth) based on network distances, rather than Euclidean distances
- Network split into “lixels” (1-D version of pixels)
- We test 4 bandwidth (\(\tau\)) values: 50m,100m,250m and 500m
- We compare hotspots derived by taking the top 10%, 5%, 1% and 0.1% of lixel density values
- We keep the lixel size constant at 10m.
Hotspots Interactive Map
- Hotspots displayed as a contour map
- Each bandwidth layer can be toggled on and off by clicking the layers icon, below the zoom buttons, on the left hand side
inc_network_kde <- read_sf(paste0(getwd(),"/output/bikemaps_kde_10m_lixel_50m_250m_bw.gpkg"))
cols_vec <- c("kde_bw_50","kde_bw_100","kde_bw_150","kde_bw_250")
inc_network_hotspots <- lapply(cols_vec, function(x) detect_network_hotspot(inc_network_kde,x,percentiles = c(0.90,0.95,0.99,0.999)))
names(inc_network_hotspots) <- bandwidth
#interactive plot
mapview(inc_network_hotspots,zcol=list("thresholds","thresholds","thresholds","thresholds"))
Hotspots Map by Threshold and Bandwidth
#plot in ggplot
pal <- RColorBrewer::brewer.pal(6,name = "Reds")[-(1:2)]
inc_network_hotspots_cmbnd <- do.call(what = sf:::rbind.sf, args = inc_network_hotspots) %>%
mutate(bw = row.names(.)) %>%
separate(col = bw,into = c("bw",NA)) %>%
mutate(bw = factor(bw, levels = as.character(bandwidth)))
ggplot()+
geom_sf(data=st_union(study_area),fill="snow1") +
geom_sf(data=inc_network_hotspots_cmbnd,aes(fill=thresholds,colour=thresholds)) +
scale_colour_manual(values = pal) +
scale_fill_manual(values = pal) +
facet_grid(bw~thresholds) +
theme_minimal() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.minor = element_blank() ,
panel.grid.major = element_blank() ,
axis.title = element_blank(),legend.position = "none") +
coord_sf() +
ggtitle("Network Kernel Density: Percentile Threshold (Columns), Bandwidth (rows)")

Quantifying Differences
hs <- lapply(1:nrow(inc_network_hotspots_cmbnd),
function(x) st_sf(
st_cast(
st_union(
st_buffer(
inc_network_hotspots_cmbnd[x,],dist=0.5
)
), "POLYGON"
)
) %>% mutate(area= st_area(.)))
qe <- tibble(threshold = inc_network_hotspots_cmbnd$thresholds,
bandwidth = inc_network_hotspots_cmbnd$bw,
m = sapply(hs,function(x){sum(x$area)}),
avg_m = sapply(hs,function(x){mean(x$area)}),
n = sapply(hs,FUN = nrow))
qe %>%
select(threshold,bandwidth,n) %>%
pivot_wider(names_from = bandwidth,values_from = n) %>%
knitr::kable()
| 0.9 |
169 |
133 |
114 |
83 |
| 0.95 |
169 |
133 |
123 |
121 |
| 0.99 |
181 |
50 |
44 |
20 |
| 0.999 |
34 |
12 |
9 |
3 |
- Number of hotspots by threshold and bandwidth
- Larger bandwidth results in less distinct hotspots (e.g instead of many distinct hotspots, few larger hotspots)
- Higher threshold results in fewer hotspots (generally) but this difference is much more pronounced for smaller bandwidths than for larger ones
- e.g. 6 distinct hotspots for a 500m bandwidth with 90% threshold, compared to 1 with 99.9% threshold. Compare this with 2302 distinct hotspots for a 50m bandwidth with a 90% threshold and 13 for a 99.9% threshold
pal <- RColorBrewer::brewer.pal(6,name = "Purples")[-(1:2)]
qe %>%
ggplot(aes(x=bandwidth,y=avg_m,colour=threshold,group=threshold)) +
geom_point() +
scale_colour_manual(values = pal) +
geom_line() + theme_minimal()

- Increasing the bandwidth results in larger and fewer hotspots
- Decreasing the bandwidth you an increased number of hotspots of a smaller size
#Integrating Exposure
Previous hotspots represent areas of high crash burden
To estimate risk we need to integrate information on exposure
We take a simple approach to estimating crash risk density by dividing a kernel density estimate of crashes over a kernel density estimate of exposure: \[\lambda(z_{r}) = \frac{\lambda(z_{events})}{\lambda(z_{exposure})}\]
This can be applied to either planar or network based density estimates
Planar Kernel Density
- To estimate a planar exposure surface we use density.psp from spatstat package
- Weighted by strava counts
- We compare hotspots derived by taking the top 10%, 5%, 1% and 0.1% of pixel density values
# Create planar density surface for exposure
network_sp <- as(network,"Spatial")
exp_psp <- as(network_sp[,"CAADB"],"psp")
Window(exp_psp) <- as(study_area_sp,"owin") #define study extent in ppp object
exp_planar <- lapply(1:length(bandwidth),function(x){
density.psp(exp_psp,
kernel = "quartic",
sigma = bandwidth[x],
eps = 10,
weights = exp_psp$marks)
}) #kernel density images
#recale exposure density to be positive values
exp_planar_r <- lapply(exp_planar,function(x) raster(x,crs = CRS('+init=EPSG:26910'))) #convert to raster file
for(i in 1:length(exp_planar_r)) {
if(any(exp_planar_r[[i]]@data@values<0,na.rm=TRUE)){
exp_planar_r[[i]]@data@values <- scales::rescale(exp_planar_r[[i]]@data@values,
to=c(0.000001,max(exp_planar_r[[i]]@data@values,na.rm = TRUE)))
}
}
## Divide incident raster surface by exposure planar surface
risk_planar <- mapply(`/`,inc_planar_r,exp_planar_r)
#find risk hotspots
risk_hotspots <- lapply(risk_planar, function(x) detect_planar_hotspot(x,percentiles = c(0.90,0.95,0.99,0.999)))
#interactive plot
mapview(risk_hotspots,alpha.regions=0.25)
#Plot in ggplot2
risk_hotspots_cmbnd <- do.call(what = sf:::rbind.sf, args = risk_hotspots) %>%
mutate(bw = row.names(.)) %>%
separate(col = bw,into = c("bw",NA)) %>%
mutate(bw=factor(bw,levels = as.character(bandwidth)))
pal <- RColorBrewer::brewer.pal(6,name = "Reds")[-(1:2)]
ggplot()+
geom_sf(data=st_union(study_area),fill="snow1") +
geom_sf(data=risk_hotspots_cmbnd,aes(fill=layer,colour=layer)) +
scale_colour_manual(values = pal) +
scale_fill_manual(values = pal) +
facet_grid(bw~layer) +
theme_minimal() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.minor = element_blank() ,
panel.grid.major = element_blank() ,
axis.title = element_blank(),legend.position = "none") +
coord_sf() +
ggtitle("Risk Kernel Density: Percentile Threshold (Columns), Bandwidth (rows)")

Network Kernel Density
- We compare hotspots derived by taking the top 10%, 5%, 1% and 0.1% of lixel density values
exposure_network_kde <- read_sf(paste0(getwd(),"/output/strava_kde_10m_lixel_50m_250m_bw.gpkg"))
risk_network_kde <- left_join(inc_network_kde,
exposure_network_kde %>% st_drop_geometry(),
by = c("ID","LIXID","length"))
risk_network_kde <- risk_network_kde %>%
mutate(risk_kde_bw_50 = kde_bw_50.x/kde_bw_50.y,
risk_kde_bw_100 = kde_bw_100.x/kde_bw_100.y,
risk_kde_bw_150 = kde_bw_150.x/kde_bw_150.y,
risk_kde_bw_250 = kde_bw_250.x/kde_bw_250.y
)
cols_vec <- c("risk_kde_bw_50","risk_kde_bw_100","risk_kde_bw_150","risk_kde_bw_250")
risk_network_hotspots <- lapply(cols_vec,
function(x) detect_network_hotspot(risk_network_kde,
x,
percentiles = c(0.90,0.95,0.99,0.999)))
names(risk_network_hotspots) <- bandwidth
#interactive plot
mapview(risk_network_hotspots,zcol=list("thresholds","thresholds","thresholds","thresholds"))